In [ ]:
 

Problem statement

The attributes of this dataset are: age, gender, heart rate, systolic blood pressure, diastolic blood pressure, blood sugar, ck-mb and troponin with negative or positive output. According to the provided information, the medical dataset classifies either heart attack or none. The gender column in the data is normalized: the male is set to 1 and the female to 0. The glucose column is set to 1 if it is > 120; otherwise, 0. As for the output, positive is set to 1 and negative to 0.

The CK-MB test is a blood test that looks for a specific enzyme. That enzyme, creatine kinase-myocardial band, is most common in your heart but can also mean you have damage to other muscles in your body.

In [1]:
#loading dataset
import pandas as pd
import numpy as np
#visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#EDA
from collections import Counter
import pandas_profiling as pp
# data preprocessing
from sklearn.preprocessing import StandardScaler
# data splitting
from sklearn.model_selection import train_test_split
# data modeling
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
#ensembling
C:\Users\anike\anaconda3\lib\site-packages\ipykernel_launcher.py:10: DeprecationWarning: `import pandas_profiling` is going to be deprecated by April 1st. Please use `import ydata_profiling` instead.
  # Remove the CWD from sys.path while we load stuff.
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
data = pd.read_csv(r"C:\MACHINE LEARNING\heart disease\Medicaldataset.csv")
In [4]:
data.head()
Out[4]:
Age Gender Heart rate Systolic blood pressure Diastolic blood pressure Blood sugar CK-MB Troponin Result
0 64 1 66 160 83 160.0 1.80 0.012 negative
1 21 1 94 98 46 296.0 6.75 1.060 positive
2 55 1 64 160 77 270.0 1.99 0.003 negative
3 64 1 70 120 55 270.0 13.87 0.122 positive
4 55 1 64 112 65 300.0 1.08 0.003 negative
In [5]:
data.shape
Out[5]:
(1319, 9)
In [6]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Age                       1319 non-null   int64  
 1   Gender                    1319 non-null   int64  
 2   Heart rate                1319 non-null   int64  
 3   Systolic blood pressure   1319 non-null   int64  
 4   Diastolic blood pressure  1319 non-null   int64  
 5   Blood sugar               1319 non-null   float64
 6   CK-MB                     1319 non-null   float64
 7   Troponin                  1319 non-null   float64
 8   Result                    1319 non-null   object 
dtypes: float64(3), int64(5), object(1)
memory usage: 92.9+ KB

EDA

In [7]:
pp.ProfileReport(data)
Out[7]:

In [8]:
data.columns
Out[8]:
Index(['Age', 'Gender', 'Heart rate', 'Systolic blood pressure',
       'Diastolic blood pressure', 'Blood sugar', 'CK-MB', 'Troponin',
       'Result'],
      dtype='object')
In [9]:
data.describe()
Out[9]:
Age Gender Heart rate Systolic blood pressure Diastolic blood pressure Blood sugar CK-MB Troponin
count 1319.000000 1319.000000 1319.000000 1319.000000 1319.000000 1319.000000 1319.000000 1319.000000
mean 56.191812 0.659591 78.336619 127.170584 72.269143 146.634344 15.274306 0.360942
std 13.647315 0.474027 51.630270 26.122720 14.033924 74.923045 46.327083 1.154568
min 14.000000 0.000000 20.000000 42.000000 38.000000 35.000000 0.321000 0.001000
25% 47.000000 0.000000 64.000000 110.000000 62.000000 98.000000 1.655000 0.006000
50% 58.000000 1.000000 74.000000 124.000000 72.000000 116.000000 2.850000 0.014000
75% 65.000000 1.000000 85.000000 143.000000 81.000000 169.500000 5.805000 0.085500
max 103.000000 1.000000 1111.000000 223.000000 154.000000 541.000000 300.000000 10.300000
In [10]:
# checking whether data contains null values
data.isnull().sum()
Out[10]:
Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
dtype: int64

Data pre-processing

In [11]:
for i in data.columns:
    print({i:data[i].unique()})
#to get all unique values
{'Age': array([ 64,  21,  55,  58,  32,  63,  44,  67,  54,  47,  61,  86,  45,
        37,  60,  48,  52,  30,  50,  72,  42,  35,  68,  56,  65,  34,
        40,  46,  38,  57,  28,  49,  29,  80,  90,  62,  53,  75,  66,
        19,  77,  71,  43,  51,  59,  20,  36,  70,  78,  69,  73,  41,
        82,  25,  26,  76,  33,  39,  91,  31,  74,  22,  79,  81,  27,
        83,  24,  85,  88, 100,  23,  14,  87, 103,  84], dtype=int64)}
{'Gender': array([1, 0], dtype=int64)}
{'Heart rate': array([  66,   94,   64,   70,   61,   40,   60,   76,   81,   73,   72,
         92,  135,   63,   65,  125,   62,   58,   93,   96,   95,   97,
         91,   87,   77,   80,   82,   83,   78,   90,   59,   57,   98,
       1111,  102,  103,  105,   74,   85,   75,   71,   68,   67,   56,
         89,   88,   86,   79,  100,   69,   84,  110,  120,  122,  119,
        116,  114,   55,   53,   54,  117,  112,  108,  134,  111,  101,
        113,   51,   52,   99,  132,   50,  107,  104,   49,   46,   20,
         36,   45], dtype=int64)}
{'Systolic blood pressure': array([160,  98, 120, 112, 179, 214, 154, 166, 150, 199, 122, 118, 114,
       100, 107, 109, 151, 110, 104, 106, 152, 134, 135, 131, 137, 121,
       145, 136, 156, 155, 105,  91, 101, 111, 115, 133, 153, 125, 130,
       127, 140, 141, 128, 123,  94,  95, 117, 124, 119, 138, 157, 202,
       175, 144, 129,  97, 116, 113, 148, 164, 192, 171, 108, 132,  85,
        89,  87,  99,  96,  86,  70,  71,  78, 126, 170, 169, 149, 168,
       191, 193, 167, 165, 146,  93, 180,  42, 220, 147, 142, 102, 103,
       143, 139, 178,  80,  84,  83,  65, 208,  76,  90,  92, 183,  88,
       158, 161, 176, 159, 198, 204, 200, 162, 177, 223,  67, 209],
      dtype=int64)}
{'Diastolic blood pressure': array([ 83,  46,  77,  55,  65,  58,  68,  82,  81,  95,  90,  99,  67,
        70,  66,  86,  78,  60,  85,  63,  57,  61,  49,  62,  76,  75,
        71,  52,  80,  74,  73,  69,  94,  88,  64,  59,  93,  54,  56,
        44,  72,  89,  51,  50,  79,  47,  92, 103,  97, 110,  84, 104,
       100,  42, 128, 107,  91,  98,  38,  53,  48,  87, 106,  41,  40,
       105, 102, 118, 109,  43,  45,  96, 154], dtype=int64)}
{'Blood sugar': array([160. , 296. , 270. , 300. ,  87. , 102. , 135. , 100. , 198. ,
        92. ,  97. , 319. , 134. ,  96. , 274. ,  89. , 301. , 227. ,
       107. , 269. , 111. , 101. ,  95. , 279. , 166. , 321. ,  98. ,
       105. , 136. ,  82. , 117. , 120. , 208. , 125. , 103. ,  93. ,
        99. , 228. , 238. , 133. , 113. ,  91. , 114. , 149. , 110. ,
       251. , 191. , 334. , 109. , 201. , 167. ,  85. , 112. , 123. ,
        86. , 177. ,  90. , 115. , 392. , 147. , 141. , 222. , 174. ,
       162. , 219. , 189. , 193. , 181. , 387. , 121. , 294. , 116. ,
        88. , 240. , 132. , 159. ,  81. , 266. , 142. , 244. , 130. ,
       182. ,  94. ,  83. , 241. , 318. ,  66. , 156. , 108. , 322. ,
       187. , 122. , 362. , 180. , 127. , 131. ,  84. , 137. , 242. ,
       106. , 197. , 152. , 169. , 347. , 104. , 165. , 126. , 215. ,
        61. ,  80. , 195. , 150. , 194. , 233. , 462. , 422. , 245. ,
       168. , 188. , 129. , 200. , 146. , 140. , 382. , 217. , 303. ,
       154. , 221. , 186. , 218. ,  68. , 175. ,  50. , 203. , 119. ,
        77. , 431. , 202. , 118. , 184. , 246. , 157. , 155. , 230. ,
       225. , 368. , 408. , 302. , 500. , 145. , 541. , 163. ,  76. ,
        78. , 415. , 170. , 351. , 204. , 443. , 407. , 290. , 144. ,
       220. , 381. , 196. , 190. , 267. , 256. , 257. , 261. , 234. ,
       139. , 235. , 404. , 425. , 366. , 210. , 226. , 207. , 161. ,
       211. , 258. , 342. , 276. , 216. , 124. , 143. , 249. , 185. ,
       336. , 324. , 263. ,  60. , 128. , 346. , 151. , 232. , 271. ,
       283. , 148. , 293. , 305. , 338. , 282. ,  79. , 354. , 277. ,
       153. , 164. , 328. , 223. ,  74. , 239. , 231. , 229. ,  67. ,
        62. , 262. , 246.7, 138. , 391. ,  35. , 252. ,  75. , 224. ,
        69. , 272. , 314. , 247. , 253. , 434. ,  64. , 297. ,  65. ,
       406. , 206. , 171. , 259. , 265. , 250. , 329. , 285. , 192. ,
       331. ])}
{'CK-MB': array([  1.8  ,   6.75 ,   1.99 ,  13.87 ,   1.08 ,   1.83 ,   0.71 ,
       300.   ,   2.35 ,   2.84 ,   2.39 ,   3.43 ,   1.42 ,   2.57 ,
         1.49 ,   1.11 ,   0.606,   2.89 ,   1.6  ,  94.79 ,   0.665,
        50.46 ,  38.72 ,   2.11 ,   2.93 ,   1.61 ,   0.493,   1.31 ,
         4.58 ,   6.48 ,   0.929,   1.37 ,   6.78 ,   4.24 ,   1.3  ,
         0.609,  15.23 ,   1.54 ,  16.95 ,   2.97 ,   4.22 ,   1.29 ,
         4.8  ,   1.33 ,   1.19 ,   0.78 ,   2.28 ,   4.39 ,  19.47 ,
         2.41 ,   3.18 ,  36.24 ,   2.21 ,   2.19 ,   5.33 ,   5.22 ,
         1.63 ,   1.24 ,   5.8  ,   3.29 ,   0.937,   4.45 ,   4.02 ,
        18.15 ,   0.865,   3.3  ,   0.718,   3.45 ,   7.65 ,   4.3  ,
         0.994,   1.53 ,  31.97 ,   2.91 ,   3.2  ,   9.35 ,  12.02 ,
         4.66 ,   4.18 ,   5.81 ,   0.633,   2.69 ,   1.06 ,   4.82 ,
         2.13 ,   2.85 ,   6.91 ,   1.98 ,  19.5  ,   0.468, 165.1  ,
         1.64 ,   1.87 ,   1.69 ,   3.27 ,   3.75 ,   1.51 ,   2.16 ,
         5.27 ,   1.96 ,  40.99 ,  96.08 ,  51.9  ,  74.45 ,   8.84 ,
         6.28 ,   2.2  ,  49.8  ,   3.46 ,   2.27 ,   2.15 ,   0.452,
         2.   ,  35.55 ,   3.25 ,  21.61 ,   2.26 ,  14.21 ,   4.16 ,
         1.5  ,   1.73 ,   1.28 ,   2.46 ,   2.38 ,   4.61 ,   1.36 ,
         2.58 , 264.4  ,   0.687,  20.71 ,   7.02 ,   2.42 ,   4.37 ,
         4.76 ,   3.84 ,   2.74 ,   1.65 ,   1.27 ,   1.2  ,   0.743,
        39.53 ,   5.6  ,   3.58 ,   4.56 ,   3.26 ,   2.37 ,   3.36 ,
        20.21 ,   4.43 ,   3.32 ,  16.08 ,   3.09 ,   6.14 ,   0.683,
         6.17 ,   8.57 ,   1.76 ,   2.99 , 286.9  ,   0.487,   3.77 ,
         1.67 ,   2.82 ,   0.998,  25.74 ,   0.596,  16.62 ,   1.81 ,
         7.32 ,   2.96 ,   4.55 ,   2.94 ,   4.69 ,   2.14 ,   0.676,
         0.879,   2.31 ,  20.46 ,  25.36 ,   2.73 ,   3.7  ,   3.24 ,
         1.46 ,  98.48 ,   3.86 ,   2.61 ,   4.   ,   3.35 ,   4.73 ,
         4.29 ,   2.7  ,   1.16 ,   6.27 ,   5.16 ,   3.85 ,  14.07 ,
       101.9  ,   3.   ,   1.77 , 134.7  ,   2.78 ,   0.722,   2.9  ,
         2.98 ,   2.49 ,   3.23 ,   2.04 ,   1.9  ,   3.14 ,  16.55 ,
         2.33 ,  10.78 ,  13.97 ,   1.72 ,   1.68 ,   2.03 ,   1.82 ,
         2.05 ,   2.72 ,   7.64 ,   0.813,   5.86 ,  43.83 ,   0.821,
        15.67 ,   3.76 ,  99.56 ,   2.12 ,   5.46 ,   3.64 ,   7.19 ,
         6.09 ,   2.64 ,   0.961,   1.22 ,   1.93 ,   3.94 ,   8.23 ,
         5.37 ,   1.41 ,   2.5  ,   7.01 ,   2.09 ,   1.43 ,  64.86 ,
         0.51 ,   0.958,   2.54 ,   2.25 ,   0.921,   3.11 ,  31.2  ,
         1.45 ,   2.22 ,  23.97 ,  95.34 ,   7.97 ,   2.24 , 104.3  ,
        13.92 ,   6.4  ,   2.6  ,   1.85 ,   3.97 ,  18.96 ,   5.23 ,
         4.28 ,  25.97 ,  15.74 ,   3.98 ,   2.68 ,   5.43 ,   3.08 ,
        12.3  ,   0.353,   8.87 ,   6.13 ,  10.44 ,   0.979,   5.83 ,
        12.41 ,  34.36 ,   3.04 ,  21.04 ,   2.8  ,   5.87 ,   2.65 ,
         7.91 ,   5.34 ,  11.32 ,   3.93 ,   2.51 ,   1.47 ,   0.942,
         8.15 ,   6.11 ,   0.583,   8.93 ,   1.71 ,  17.61 ,   1.05 ,
         3.28 ,   0.966,   3.52 ,  96.02 ,   0.345,   7.44 ,   0.607,
         2.92 ,   5.39 ,   5.36 ,   1.26 ,   2.18 ,   0.755,   4.71 ,
        18.41 ,   2.55 ,  17.95 ,  38.94 ,   6.9  ,   1.34 ,   0.745,
        23.11 ,  12.22 ,   1.78 ,  17.3  ,   3.48 ,   4.23 ,   9.9  ,
       185.1  ,   2.71 , 207.5  ,   1.74 ,   1.92 ,   1.03 , 165.   ,
         4.11 , 266.3  ,   6.3  ,   1.94 ,   7.52 ,  40.6  ,   2.3  ,
        17.04 ,   1.97 ,  14.01 ,  11.73 ,   1.95 ,   8.37 ,   3.05 ,
         4.01 ,   1.58 , 142.6  ,   9.06 ,   5.41 ,  22.91 ,   6.25 ,
         5.11 ,  70.89 ,   3.68 ,   7.55 ,   4.49 ,   0.726,   8.4  ,
         1.79 ,   5.82 ,   8.38 ,   4.47 ,   7.29 ,   4.25 ,  27.06 ,
        12.39 ,  11.24 ,   6.66 ,   7.66 ,   1.25 ,   5.73 ,   3.49 ,
         3.42 ,   0.657,  15.83 ,   0.604,   0.515,   8.86 ,  10.33 ,
         1.18 ,  61.1  ,   4.36 ,   0.529,   1.35 ,   4.74 ,  13.98 ,
        36.53 ,   2.67 ,   1.21 ,   5.51 ,   5.61 ,   3.03 ,   9.96 ,
         3.1  ,  11.48 ,  24.2  , 259.7  ,   1.59 ,   2.4  ,  56.39 ,
        33.87 ,  12.2  ,   1.4  , 147.4  ,   1.75 ,   4.62 ,   1.44 ,
         1.66 ,   0.649,   8.32 ,   4.06 ,   2.87 ,   2.56 ,   0.907,
         4.1  ,   1.13 ,  23.8  ,   8.34 ,   3.21 ,  58.29 ,  13.73 ,
         5.02 ,   4.2  ,   1.86 ,  33.48 ,   8.95 ,   6.04 ,   5.75 ,
         1.52 ,   4.96 ,   5.77 ,  19.01 ,  52.94 ,   3.38 , 297.5  ,
         3.73 ,  15.69 ,   7.73 , 254.4  ,   1.02 ,  10.04 ,   1.09 ,
         2.86 ,   4.15 ,   3.83 ,   6.71 ,   2.02 ,  17.32 ,  14.22 ,
         3.69 ,   4.72 ,   3.47 ,   1.62 , 217.5  ,   4.93 ,   2.53 ,
        12.26 ,   3.95 ,   3.61 ,   3.17 ,   8.8  ,   3.87 ,   2.63 ,
        11.64 ,   0.799,   8.12 ,   1.57 ,   0.925,   1.7  ,   4.68 ,
         1.04 ,   4.03 ,  15.88 ,   5.68 ,   1.07 ,   4.6  ,   6.01 ,
         3.4  ,   5.15 ,  37.69 ,   3.65 ,   4.95 ,  72.6  ,  32.77 ,
         0.796,   0.704,   2.34 ,  63.13 ,   7.48 ,   0.88 ,  31.4  ,
         1.39 ,   1.01 ,   8.08 ,   0.457,   7.26 , 177.9  ,  42.15 ,
        79.41 ,   0.92 ,  43.06 ,   4.14 ,   3.96 , 251.4  ,   0.982,
         9.05 ,   6.38 ,   2.79 ,  89.61 ,  17.63 ,   5.1  ,   0.674,
       190.7  , 144.9  , 201.7  ,   2.59 ,   6.36 ,  25.1  ,   6.19 ,
        89.22 ,   1.17 ,  10.11 ,   3.15 ,   9.82 ,  63.08 ,   9.09 ,
         1.48 ,  44.18 ,   3.53 ,   4.26 ,   4.21 ,  19.98 ,   1.1  ,
         5.49 ,   0.856,   3.39 ,   3.59 ,   1.12 ,   3.33 ,   3.12 ,
         0.858,  21.51 ,  66.32 ,   0.516,   2.1  , 107.3  ,  25.63 ,
        11.94 ,   2.36 ,  28.41 ,   5.85 ,   2.83 ,   5.66 ,   2.62 ,
        33.09 ,   4.07 ,  81.65 ,   4.08 ,  17.22 , 208.6  ,   3.41 ,
         3.5  ,   3.13 ,  11.07 ,   6.61 ,   4.4  ,   4.78 ,   0.785,
        15.32 ,   3.78 ,   1.32 ,  13.7  , 111.   ,  10.18 ,   6.63 ,
         1.88 ,   4.79 ,  60.68 ,   0.898,  81.84 , 247.8  ,   3.07 ,
         2.47 ,   4.84 ,   3.72 ,   2.43 ,  10.75 ,   5.01 ,   0.321,
         3.63 ,  71.77 ,   4.34 ,   1.15 ,  31.85 ,   9.63 ,  11.87 ,
         4.67 ,   7.67 ,   2.52 ,   4.87 ,  79.62 ,   3.57 ,  32.53 ,
         0.706,   4.19 ,   8.54 ,   7.61 ,  69.32 ,   4.41 ,   4.57 ,
        11.45 ,   7.3  ,   6.57 ,   0.68 ,   7.03 ,  19.63 ,   5.17 ,
         2.66 ,   8.66 ,  61.2  ,  14.97 ,   3.71 ,   4.13 ,   0.863,
         7.47 ,   6.74 ,   4.35 ,  16.1  ,  12.89 ,  10.26 ,  18.43 ,
         8.47 ,   8.49 ,  24.64 ,  43.51 ,  46.1  ,  88.28 ,   0.569,
         7.06 ,  33.95 ,   5.57 ,  51.96 ,   2.17 ,   0.681,   0.826,
         0.89 ,   9.71 ,  10.42 ,   1.   ,   6.03 ,   4.05 ,  99.62 ,
         0.728,   2.88 ,   5.65 ,   4.17 ,   0.684,   6.67 ,  19.26 ,
        31.06 ,  39.63 ,  78.89 ,   6.15 ,   2.07 ,   8.14 ,  42.5  ,
        25.56 ,  31.22 ,   5.08 ,   8.69 ,   3.81 ,  27.31 ,   9.51 ,
         8.21 ,   5.58 ,   4.64 ,   0.746,  11.4  ,  25.04 ,   6.41 ,
         0.483,  33.7  ,   0.973,   1.89 ,   4.32 ,   6.47 ,  39.34 ,
        27.57 ,  12.57 ,   5.78 ,  14.72 ,   7.05 , 261.   ,  50.89 ])}
{'Troponin': array([1.20e-02, 1.06e+00, 3.00e-03, 1.22e-01, 4.00e-03, 2.37e+00,
       1.10e-02, 6.00e-03, 1.30e-02, 5.37e+00, 1.70e-02, 7.76e-01,
       2.00e-02, 5.00e-03, 4.91e-01, 6.12e-01, 1.39e+00, 7.00e-03,
       1.00e+01, 8.30e+00, 2.10e-02, 1.15e+00, 1.46e-01, 8.00e-03,
       2.60e-02, 5.30e-02, 9.00e-03, 6.70e-02, 4.00e-02, 1.00e-02,
       3.10e-02, 1.50e-02, 7.60e-02, 5.20e-02, 1.01e+00, 8.90e-02,
       2.80e-02, 7.03e-01, 8.50e-02, 2.19e-01, 8.64e-01, 1.05e-01,
       4.80e-02, 8.88e-01, 1.60e-02, 1.07e+00, 2.20e-02, 6.05e+00,
       7.10e-02, 1.03e-01, 2.30e-02, 3.80e-02, 5.10e-02, 2.90e-02,
       1.40e-02, 2.23e+00, 1.55e+00, 1.84e+00, 6.40e-01, 7.67e+00,
       6.10e-02, 9.40e-02, 2.70e-02, 5.40e-02, 2.52e-01, 1.79e+00,
       1.95e+00, 3.92e-01, 3.27e-01, 4.60e-02, 1.24e+00, 1.78e-01,
       1.90e-02, 1.97e+00, 6.81e-01, 1.06e-01, 1.46e+00, 6.30e-02,
       1.23e+00, 2.86e+00, 1.64e-01, 1.86e+00, 3.20e-02, 2.40e-02,
       2.50e-02, 1.42e-01, 2.99e+00, 1.00e-03, 1.71e-01, 2.80e-01,
       9.70e-02, 2.00e-03, 3.39e+00, 1.33e+00, 4.26e-01, 6.80e-02,
       3.53e-01, 8.16e-01, 5.98e-01, 7.70e-01, 3.00e-02, 3.40e-02,
       2.00e-01, 2.92e-01, 7.00e-02, 3.24e-01, 1.18e+00, 2.45e-01,
       1.12e-01, 5.05e+00, 2.67e-01, 1.88e-01, 3.60e-02, 1.79e-01,
       5.18e-01, 4.25e-01, 4.31e-01, 2.73e+00, 1.45e-01, 3.31e-01,
       4.01e-01, 2.88e-01, 1.80e-02, 4.20e-02, 2.96e+00, 9.50e-02,
       2.62e-01, 2.42e+00, 8.10e-02, 1.17e-01, 4.12e-01, 6.93e-01,
       3.72e-01, 3.50e-02, 3.85e-01, 1.25e+00, 5.54e-01, 1.83e+00,
       1.35e+00, 9.29e-01, 9.80e-02, 3.90e-02, 1.96e+00, 2.63e+00,
       4.92e-01, 6.20e-02, 9.88e-01, 1.77e-01, 9.60e-02, 3.28e+00,
       1.87e-01, 2.03e-01, 1.62e+00, 5.31e+00, 3.23e+00, 5.80e-02,
       4.40e-02, 2.71e-01, 4.32e+00, 1.21e+00, 4.54e-01, 1.63e+00,
       2.48e+00, 5.48e+00, 1.38e-01, 4.52e-01, 9.11e+00, 9.80e-01,
       2.53e+00, 6.28e-01, 1.01e-01, 7.58e-01, 2.39e+00, 2.34e+00,
       6.97e-01, 8.53e-01, 9.92e-01, 4.57e-01, 6.27e-01, 1.96e-01,
       1.64e+00, 1.19e-01, 6.43e-01, 1.76e+00, 3.70e-02, 1.28e+00,
       6.53e-01, 2.47e+00, 2.35e+00, 4.30e-02, 6.62e-01, 1.92e-01,
       1.00e-01, 2.87e+00, 1.54e-01, 2.81e+00, 1.91e-01, 8.66e-01,
       1.32e+00, 1.65e-01, 1.30e+00, 1.36e+00, 2.09e-01, 3.85e+00,
       8.47e-01, 5.23e-01, 1.48e+00, 9.90e-02, 1.47e+00, 4.10e-02,
       1.50e+00, 1.29e+00, 9.59e-01, 1.30e-01, 4.90e-02, 1.36e-01,
       2.18e-01, 9.37e-01, 3.21e+00, 1.31e+00, 8.17e-01, 2.32e+00,
       1.17e+00, 7.50e-02, 1.43e+00, 8.20e-02, 3.14e-01, 3.83e+00,
       1.44e+00, 5.00e-02, 2.57e+00, 8.51e-01, 2.12e+00, 1.60e+00,
       2.98e-01, 9.10e-01, 1.14e-01, 4.07e-01, 9.99e-01, 8.70e-02,
       4.85e-01, 6.90e-02, 1.85e+00, 2.06e+00, 7.01e-01, 7.06e-01,
       4.06e-01, 7.70e-02, 1.58e+00, 5.87e+00, 1.09e-01, 6.10e-01,
       8.63e-01, 7.22e-01, 3.25e-01, 1.38e+00, 1.40e+00, 6.49e-01,
       3.57e-01, 2.43e+00, 3.54e-01, 6.50e-02, 2.08e-01, 1.29e-01,
       1.41e-01, 4.02e-01, 9.12e-01, 1.93e-01, 4.50e-02, 1.10e+00,
       2.20e+00, 1.04e-01, 1.45e+00, 4.84e-01, 7.40e-01, 5.40e-01,
       8.30e-01, 8.40e-01, 1.75e+00, 8.80e-01, 9.40e-01, 1.16e-01,
       2.20e-01, 2.80e+00, 7.60e-01, 4.05e-01, 2.30e-01, 2.24e-01,
       6.01e+00, 2.99e-01, 6.09e-01, 7.90e-01, 7.10e-01, 6.02e-01,
       1.52e-01, 8.10e-01, 1.03e+01, 8.80e-02, 5.90e-02, 6.00e-02,
       5.53e-01, 7.40e-02, 5.49e-01, 7.95e-01, 1.51e+00, 1.47e-01,
       2.29e+00, 2.50e+00, 8.30e-02, 8.60e-02, 3.97e-01, 9.98e-01,
       1.26e+00, 7.80e-02, 5.07e-01, 3.48e-01, 5.40e+00, 2.16e+00,
       3.77e+00, 5.11e+00, 9.30e-01, 8.00e-02, 7.00e-01, 1.28e-01,
       1.81e+00, 1.94e+00, 5.70e-02, 6.35e-01, 9.16e-01, 6.60e-02,
       5.50e-02, 2.79e-01, 3.34e+00, 9.23e-01, 9.20e-02, 4.34e-01,
       2.95e-01, 4.61e-01, 7.20e-02, 1.97e-01, 1.10e-01, 1.53e+00,
       1.04e+00, 5.44e-01, 2.55e-01, 1.37e+00, 4.50e-01, 2.63e-01,
       1.72e-01, 4.25e+00, 3.59e-01, 1.77e+00])}
{'Result': array(['negative', 'positive'], dtype=object)}
In [12]:
# have converted the categorical data to numerical manually as the data containns only one object variable
#replacing values
data['Result'].replace(['negative', 'positive'],
                        [0, 1], inplace=True)
In [13]:
# now lets check whether it converted or not
data.dtypes
Out[13]:
Age                           int64
Gender                        int64
Heart rate                    int64
Systolic blood pressure       int64
Diastolic blood pressure      int64
Blood sugar                 float64
CK-MB                       float64
Troponin                    float64
Result                        int64
dtype: object

Data Visualization

In [14]:
p = data.Gender.value_counts().plot(kind='bar')

From the above Visualization we can predict that we have greater number of MALE patients than the FEMALE patient

In [15]:
plt.hist(data['Age'])
Out[15]:
(array([ 14.,  55., 107., 207., 303., 356., 214.,  48.,  12.,   3.]),
 array([ 14. ,  22.9,  31.8,  40.7,  49.6,  58.5,  67.4,  76.3,  85.2,
         94.1, 103. ]),
 <BarContainer object of 10 artists>)
In [16]:
plt.hist(data['Heart rate'])
Out[16]:
(array([1307.,    9.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           3.]),
 array([  20. ,  129.1,  238.2,  347.3,  456.4,  565.5,  674.6,  783.7,
         892.8, 1001.9, 1111. ]),
 <BarContainer object of 10 artists>)
In [17]:
plt.hist(data['Systolic blood pressure'])
Out[17]:
(array([  1.,  17., 116., 302., 402., 262., 133.,  49.,  20.,  17.]),
 array([ 42. ,  60.1,  78.2,  96.3, 114.4, 132.5, 150.6, 168.7, 186.8,
        204.9, 223. ]),
 <BarContainer object of 10 artists>)
In [18]:
plt.hist(data['Diastolic blood pressure'])
Out[18]:
(array([ 43., 284., 352., 415., 159.,  53.,  10.,   2.,   0.,   1.]),
 array([ 38. ,  49.6,  61.2,  72.8,  84.4,  96. , 107.6, 119.2, 130.8,
        142.4, 154. ]),
 <BarContainer object of 10 artists>)
In [19]:
plt.hist(data['Blood sugar'])
Out[19]:
(array([ 86., 743., 204., 136.,  65.,  41.,  20.,  16.,   4.,   4.]),
 array([ 35. ,  85.6, 136.2, 186.8, 237.4, 288. , 338.6, 389.2, 439.8,
        490.4, 541. ]),
 <BarContainer object of 10 artists>)
In [20]:
plt.hist(data['CK-MB'])
Out[20]:
(array([1.195e+03, 4.900e+01, 2.000e+01, 1.400e+01, 4.000e+00, 3.000e+00,
        5.000e+00, 1.000e+00, 7.000e+00, 2.100e+01]),
 array([  0.321 ,  30.2889,  60.2568,  90.2247, 120.1926, 150.1605,
        180.1284, 210.0963, 240.0642, 270.0321, 300.    ]),
 <BarContainer object of 10 artists>)
In [21]:
plt.hist(data['Troponin'])
Out[21]:
(array([1.19e+03, 6.90e+01, 2.80e+01, 9.00e+00, 4.00e+00, 7.00e+00,
        0.00e+00, 1.00e+00, 2.00e+00, 9.00e+00]),
 array([1.0000e-03, 1.0309e+00, 2.0608e+00, 3.0907e+00, 4.1206e+00,
        5.1505e+00, 6.1804e+00, 7.2103e+00, 8.2402e+00, 9.2701e+00,
        1.0300e+01]),
 <BarContainer object of 10 artists>)
In [22]:
p = data.Result.value_counts().plot(kind='bar')
In [23]:
data['Result'].value_counts()
# here we can see that the data is unevenly distributed between the two classes
# class 1 has the highest number of patients rather then the class 0
Out[23]:
1    810
0    509
Name: Result, dtype: int64

The above visualization it is clearly visible that our dataset is completly imbalanced in fact the number of patients who are suffering from the Heart-attack is half of the patients who are not having Heart-attack

Checking the Correlation between all the features

In [24]:
plt.figure(figsize=(12,10))
p = sns.heatmap(data.corr(),annot=True,cmap='Reds')

Scaling the data

In [25]:
# create X & Y
X = data.values[:,0:-1]
Y = data.values[:,-1]
In [26]:
print(X.shape)
print(Y.shape)
(1319, 8)
(1319,)
In [27]:
# now scale the data

from sklearn.preprocessing import StandardScaler

scaler =StandardScaler()

scaler.fit(X)
X=scaler.transform(X)
#X=scaler.fit_transform(X)
print(X)

# it transforms the data into the specific scale so that it makes data points cloder to each other
[[ 5.72357956e-01  7.18395402e-01 -2.39032215e-01 ...  1.78459449e-01
  -2.90961900e-01 -3.02342376e-01]
 [-2.57963993e+00  7.18395402e-01  3.03491001e-01 ...  1.99434379e+00
  -1.84072428e-01  6.05700979e-01]
 [-8.73625310e-02  7.18395402e-01 -2.77783874e-01 ...  1.64718943e+00
  -2.86859072e-01 -3.10140458e-01]
 ...
 [-8.20385295e-01  7.18395402e-01  1.29108539e-01 ... -6.76074358e-01
  -3.03054447e-01  3.36968791e+00]
 [-1.60664807e-01  7.18395402e-01 -3.94038849e-01 ...  3.95710113e+00
  -2.04586569e-01 -1.68298262e-03]
 [-3.80571637e-01  7.18395402e-01  3.03491001e-01 ... -1.68694910e-01
   7.69079350e-01  1.22088302e+00]]

Model Building

In [28]:
# now build the model

from sklearn.model_selection import train_test_split

#split the data into test and train 
X_train, X_test, Y_train, Y_test= train_test_split(X,Y, test_size=0.3,random_state=10)

Logistic Regression

In [29]:
from sklearn.linear_model import LogisticRegression
#create a model
classifier=LogisticRegression()
#fitting training data to the model
classifier.fit(X_train,Y_train)

Y_pred=classifier.predict(X_test)

print(Y_pred)
[0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 0. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0.
 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0.
 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0.
 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1.
 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1.
 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0.
 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1.
 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.
 1. 0. 1. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1.
 1. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1.
 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1.
 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1.]
In [31]:
# evaluating the model==> 

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)
[[121  34]
 [ 40 201]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.75      0.78      0.77       155
         1.0       0.86      0.83      0.84       241

    accuracy                           0.81       396
   macro avg       0.80      0.81      0.81       396
weighted avg       0.81      0.81      0.81       396

Accuracy of the model:  0.8131313131313131
In [32]:
#since the value of f1 precision and recall are less for class 1 so tunning is required
In [33]:
# tunning model ->
In [34]:
#store the predicted probablities
y_pred_prob = classifier.predict_proba(X_test)
print(y_pred_prob)
[[5.83290188e-01 4.16709812e-01]
 [3.62174809e-01 6.37825191e-01]
 [2.90964954e-01 7.09035046e-01]
 [3.90193608e-01 6.09806392e-01]
 [4.61144112e-01 5.38855888e-01]
 [1.32022296e-01 8.67977704e-01]
 [1.27109269e-05 9.99987289e-01]
 [5.73747161e-01 4.26252839e-01]
 [1.95476065e-06 9.99998045e-01]
 [3.24992577e-01 6.75007423e-01]
 [5.75601581e-01 4.24398419e-01]
 [6.60323248e-01 3.39676752e-01]
 [4.03574009e-01 5.96425991e-01]
 [2.69914221e-01 7.30085779e-01]
 [4.28395040e-01 5.71604960e-01]
 [6.52460617e-01 3.47539383e-01]
 [3.36700750e-01 6.63299250e-01]
 [3.98446825e-01 6.01553175e-01]
 [7.33425089e-01 2.66574911e-01]
 [4.19150737e-02 9.58084926e-01]
 [8.09831629e-01 1.90168371e-01]
 [3.46825442e-01 6.53174558e-01]
 [5.74641404e-01 4.25358596e-01]
 [2.61477914e-05 9.99973852e-01]
 [6.39741959e-02 9.36025804e-01]
 [4.35193872e-01 5.64806128e-01]
 [2.93125837e-01 7.06874163e-01]
 [3.70720224e-01 6.29279776e-01]
 [5.03224161e-06 9.99994968e-01]
 [5.10448829e-01 4.89551171e-01]
 [5.71517839e-01 4.28482161e-01]
 [5.45354270e-01 4.54645730e-01]
 [2.01948915e-03 9.97980511e-01]
 [7.50948699e-02 9.24905130e-01]
 [7.98359770e-03 9.92016402e-01]
 [1.85824727e-02 9.81417527e-01]
 [3.25458944e-01 6.74541056e-01]
 [7.33327044e-01 2.66672956e-01]
 [1.79777610e-01 8.20222390e-01]
 [7.06762301e-01 2.93237699e-01]
 [5.83197906e-01 4.16802094e-01]
 [6.98211212e-01 3.01788788e-01]
 [2.18945159e-02 9.78105484e-01]
 [5.58528591e-01 4.41471409e-01]
 [1.60017309e-02 9.83998269e-01]
 [0.00000000e+00 1.00000000e+00]
 [2.07429802e-04 9.99792570e-01]
 [2.69350863e-03 9.97306491e-01]
 [1.49439290e-04 9.99850561e-01]
 [2.30441278e-01 7.69558722e-01]
 [6.83158760e-01 3.16841240e-01]
 [2.58306178e-01 7.41693822e-01]
 [3.35575589e-01 6.64424411e-01]
 [4.45775599e-01 5.54224401e-01]
 [7.00812347e-01 2.99187653e-01]
 [5.99635776e-01 4.00364224e-01]
 [3.85176709e-04 9.99614823e-01]
 [8.06604368e-05 9.99919340e-01]
 [6.98153274e-01 3.01846726e-01]
 [8.47976730e-01 1.52023270e-01]
 [5.26275288e-04 9.99473725e-01]
 [1.40326021e-01 8.59673979e-01]
 [6.46703653e-01 3.53296347e-01]
 [4.85299924e-01 5.14700076e-01]
 [2.80975893e-02 9.71902411e-01]
 [4.02131269e-01 5.97868731e-01]
 [4.58784803e-05 9.99954122e-01]
 [4.47497363e-01 5.52502637e-01]
 [6.34437683e-01 3.65562317e-01]
 [5.15145826e-01 4.84854174e-01]
 [5.87755897e-01 4.12244103e-01]
 [5.30212298e-01 4.69787702e-01]
 [7.69283392e-01 2.30716608e-01]
 [2.84738694e-02 9.71526131e-01]
 [6.99394857e-03 9.93006051e-01]
 [3.86877110e-01 6.13122890e-01]
 [5.09541982e-01 4.90458018e-01]
 [6.16163808e-01 3.83836192e-01]
 [4.45966511e-01 5.54033489e-01]
 [2.22488304e-01 7.77511696e-01]
 [3.29112520e-01 6.70887480e-01]
 [2.32272936e-01 7.67727064e-01]
 [7.49095405e-02 9.25090459e-01]
 [3.17268163e-01 6.82731837e-01]
 [5.97372810e-01 4.02627190e-01]
 [3.69689836e-01 6.30310164e-01]
 [5.76892608e-01 4.23107392e-01]
 [6.77253174e-01 3.22746826e-01]
 [6.95464256e-01 3.04535744e-01]
 [5.99385250e-01 4.00614750e-01]
 [5.14737154e-01 4.85262846e-01]
 [1.33264642e-02 9.86673536e-01]
 [1.88945694e-03 9.98110543e-01]
 [7.17408079e-01 2.82591921e-01]
 [1.89310176e-01 8.10689824e-01]
 [7.80205644e-01 2.19794356e-01]
 [2.29817503e-01 7.70182497e-01]
 [8.55087418e-01 1.44912582e-01]
 [3.23995729e-01 6.76004271e-01]
 [7.57886445e-01 2.42113555e-01]
 [5.93354462e-01 4.06645538e-01]
 [1.51951035e-02 9.84804896e-01]
 [6.75599114e-01 3.24400886e-01]
 [2.74330901e-01 7.25669099e-01]
 [7.34309796e-01 2.65690204e-01]
 [1.30873790e-01 8.69126210e-01]
 [1.21078015e-02 9.87892199e-01]
 [7.53250159e-01 2.46749841e-01]
 [6.00681360e-01 3.99318640e-01]
 [1.90924868e-01 8.09075132e-01]
 [3.76950291e-01 6.23049709e-01]
 [7.78435141e-04 9.99221565e-01]
 [3.90698601e-01 6.09301399e-01]
 [3.40813276e-01 6.59186724e-01]
 [1.96569049e-01 8.03430951e-01]
 [4.19055260e-01 5.80944740e-01]
 [6.85116261e-01 3.14883739e-01]
 [6.29590687e-01 3.70409313e-01]
 [2.63831251e-07 9.99999736e-01]
 [5.30212015e-01 4.69787985e-01]
 [7.17316437e-02 9.28268356e-01]
 [6.32537086e-01 3.67462914e-01]
 [5.18896888e-02 9.48110311e-01]
 [5.45238327e-01 4.54761673e-01]
 [1.25305273e-01 8.74694727e-01]
 [1.28682936e-04 9.99871317e-01]
 [1.91171350e-01 8.08828650e-01]
 [7.08618965e-01 2.91381035e-01]
 [3.57211030e-01 6.42788970e-01]
 [8.64714732e-02 9.13528527e-01]
 [3.76210549e-01 6.23789451e-01]
 [3.65504994e-05 9.99963450e-01]
 [5.33252229e-01 4.66747771e-01]
 [8.32257593e-01 1.67742407e-01]
 [3.43983425e-08 9.99999966e-01]
 [4.05840385e-01 5.94159615e-01]
 [3.48447647e-03 9.96515524e-01]
 [3.17794361e-01 6.82205639e-01]
 [2.49480363e-01 7.50519637e-01]
 [1.33387374e-01 8.66612626e-01]
 [4.87851193e-01 5.12148807e-01]
 [4.06040245e-01 5.93959755e-01]
 [4.41340609e-01 5.58659391e-01]
 [5.70959458e-01 4.29040542e-01]
 [3.60247128e-01 6.39752872e-01]
 [4.31907570e-01 5.68092430e-01]
 [3.56607927e-01 6.43392073e-01]
 [2.70894418e-14 1.00000000e+00]
 [4.52079291e-02 9.54792071e-01]
 [5.55399740e-01 4.44600260e-01]
 [6.35312054e-01 3.64687946e-01]
 [1.63045443e-01 8.36954557e-01]
 [5.26824191e-05 9.99947318e-01]
 [3.43052287e-05 9.99965695e-01]
 [7.88570782e-01 2.11429218e-01]
 [2.29759686e-05 9.99977024e-01]
 [8.47171415e-03 9.91528286e-01]
 [6.66137276e-01 3.33862724e-01]
 [4.03169789e-01 5.96830211e-01]
 [3.82692047e-01 6.17307953e-01]
 [4.97190813e-01 5.02809187e-01]
 [2.94416009e-01 7.05583991e-01]
 [2.82616153e-11 1.00000000e+00]
 [6.38271648e-01 3.61728352e-01]
 [6.19308910e-01 3.80691090e-01]
 [7.41561937e-01 2.58438063e-01]
 [6.14179890e-01 3.85820110e-01]
 [5.19682419e-01 4.80317581e-01]
 [7.13899622e-01 2.86100378e-01]
 [7.32029072e-01 2.67970928e-01]
 [1.99786840e-02 9.80021316e-01]
 [2.38267445e-01 7.61732555e-01]
 [4.28743865e-01 5.71256135e-01]
 [7.09230118e-01 2.90769882e-01]
 [6.41664108e-05 9.99935834e-01]
 [6.12271283e-01 3.87728717e-01]
 [3.07621244e-01 6.92378756e-01]
 [5.71626875e-01 4.28373125e-01]
 [4.66017124e-01 5.33982876e-01]
 [1.50929629e-02 9.84907037e-01]
 [5.60397866e-06 9.99994396e-01]
 [6.19639867e-01 3.80360133e-01]
 [5.68611412e-01 4.31388588e-01]
 [4.55841695e-01 5.44158305e-01]
 [0.00000000e+00 1.00000000e+00]
 [7.16033843e-01 2.83966157e-01]
 [2.35428039e-01 7.64571961e-01]
 [1.03768105e-11 1.00000000e+00]
 [6.64956073e-01 3.35043927e-01]
 [6.35506648e-01 3.64493352e-01]
 [1.21403941e-02 9.87859606e-01]
 [1.63627082e-05 9.99983637e-01]
 [6.41448476e-01 3.58551524e-01]
 [5.29163462e-01 4.70836538e-01]
 [1.21516130e-11 1.00000000e+00]
 [7.62429202e-01 2.37570798e-01]
 [3.15377523e-03 9.96846225e-01]
 [4.85678773e-01 5.14321227e-01]
 [5.55875681e-01 4.44124319e-01]
 [3.87788938e-02 9.61221106e-01]
 [6.79669125e-01 3.20330875e-01]
 [3.30651605e-04 9.99669348e-01]
 [5.25707599e-01 4.74292401e-01]
 [5.81691583e-01 4.18308417e-01]
 [7.96805853e-01 2.03194147e-01]
 [5.43669226e-04 9.99456331e-01]
 [2.78165752e-01 7.21834248e-01]
 [5.45177846e-01 4.54822154e-01]
 [7.88513168e-01 2.11486832e-01]
 [2.00594316e-02 9.79940568e-01]
 [2.81331131e-01 7.18668869e-01]
 [2.14156265e-04 9.99785844e-01]
 [5.75666473e-01 4.24333527e-01]
 [2.75727463e-01 7.24272537e-01]
 [2.57597595e-01 7.42402405e-01]
 [0.00000000e+00 1.00000000e+00]
 [1.28435892e-02 9.87156411e-01]
 [4.65154889e-01 5.34845111e-01]
 [2.33312803e-06 9.99997667e-01]
 [1.88479275e-01 8.11520725e-01]
 [6.71587663e-01 3.28412337e-01]
 [5.30240525e-01 4.69759475e-01]
 [2.11955767e-04 9.99788044e-01]
 [6.25206719e-01 3.74793281e-01]
 [5.61825857e-01 4.38174143e-01]
 [3.48526386e-01 6.51473614e-01]
 [4.73831197e-01 5.26168803e-01]
 [7.90561372e-01 2.09438628e-01]
 [4.15662805e-01 5.84337195e-01]
 [1.40501452e-03 9.98594985e-01]
 [2.55990558e-01 7.44009442e-01]
 [4.94556624e-01 5.05443376e-01]
 [7.88016501e-01 2.11983499e-01]
 [6.11868563e-03 9.93881314e-01]
 [1.13711169e-02 9.88628883e-01]
 [4.40689677e-01 5.59310323e-01]
 [7.33751949e-01 2.66248051e-01]
 [2.27156016e-02 9.77284398e-01]
 [4.18466373e-01 5.81533627e-01]
 [5.46755901e-01 4.53244099e-01]
 [4.29685111e-01 5.70314889e-01]
 [5.33449082e-03 9.94665509e-01]
 [6.38205909e-01 3.61794091e-01]
 [5.67227911e-01 4.32772089e-01]
 [2.06292992e-01 7.93707008e-01]
 [3.74024421e-06 9.99996260e-01]
 [6.37720987e-10 9.99999999e-01]
 [8.29004639e-01 1.70995361e-01]
 [7.83727955e-01 2.16272045e-01]
 [4.01477087e-01 5.98522913e-01]
 [6.78082110e-01 3.21917890e-01]
 [7.90451736e-01 2.09548264e-01]
 [4.66588041e-01 5.33411959e-01]
 [1.75030510e-05 9.99982497e-01]
 [6.38422528e-01 3.61577472e-01]
 [5.92507112e-01 4.07492888e-01]
 [1.30977565e-01 8.69022435e-01]
 [2.73162286e-01 7.26837714e-01]
 [1.24923999e-01 8.75076001e-01]
 [5.40419206e-01 4.59580794e-01]
 [2.50645320e-05 9.99974935e-01]
 [3.54201531e-01 6.45798469e-01]
 [4.64365641e-01 5.35634359e-01]
 [2.56251735e-04 9.99743748e-01]
 [2.89872093e-01 7.10127907e-01]
 [7.57424313e-01 2.42575687e-01]
 [4.74320398e-01 5.25679602e-01]
 [8.21163603e-01 1.78836397e-01]
 [2.82188395e-03 9.97178116e-01]
 [5.40140474e-01 4.59859526e-01]
 [7.10225579e-04 9.99289774e-01]
 [6.85689489e-01 3.14310511e-01]
 [6.73768143e-01 3.26231857e-01]
 [4.93704172e-01 5.06295828e-01]
 [2.89186248e-01 7.10813752e-01]
 [3.63769987e-02 9.63623001e-01]
 [6.95618268e-01 3.04381732e-01]
 [2.60383597e-01 7.39616403e-01]
 [4.96307420e-01 5.03692580e-01]
 [6.58069949e-01 3.41930051e-01]
 [0.00000000e+00 1.00000000e+00]
 [1.18988301e-01 8.81011699e-01]
 [5.07209824e-01 4.92790176e-01]
 [7.20406342e-01 2.79593658e-01]
 [5.53122544e-01 4.46877456e-01]
 [3.56016763e-01 6.43983237e-01]
 [5.54286119e-01 4.45713881e-01]
 [5.99524984e-01 4.00475016e-01]
 [7.63098566e-01 2.36901434e-01]
 [4.22511116e-01 5.77488884e-01]
 [6.60671295e-01 3.39328705e-01]
 [1.44439147e-05 9.99985556e-01]
 [3.98161631e-01 6.01838369e-01]
 [1.07217269e-03 9.98927827e-01]
 [5.88418203e-14 1.00000000e+00]
 [5.38812558e-01 4.61187442e-01]
 [5.28323570e-01 4.71676430e-01]
 [7.66721846e-01 2.33278154e-01]
 [3.79577694e-02 9.62042231e-01]
 [8.46972506e-01 1.53027494e-01]
 [1.48112690e-02 9.85188731e-01]
 [5.22031963e-01 4.77968037e-01]
 [3.66028098e-01 6.33971902e-01]
 [2.37860547e-01 7.62139453e-01]
 [1.33715549e-06 9.99998663e-01]
 [4.02842999e-01 5.97157001e-01]
 [6.01533650e-01 3.98466350e-01]
 [5.32951588e-01 4.67048412e-01]
 [5.29017358e-01 4.70982642e-01]
 [4.78905847e-01 5.21094153e-01]
 [8.88924802e-03 9.91110752e-01]
 [5.20264477e-03 9.94797355e-01]
 [9.15519033e-02 9.08448097e-01]
 [5.25375091e-01 4.74624909e-01]
 [3.84701373e-01 6.15298627e-01]
 [5.41446814e-05 9.99945855e-01]
 [5.51244112e-01 4.48755888e-01]
 [2.22044605e-16 1.00000000e+00]
 [3.85254341e-03 9.96147457e-01]
 [6.21561614e-01 3.78438386e-01]
 [3.66993937e-01 6.33006063e-01]
 [5.36501705e-01 4.63498295e-01]
 [3.71249077e-01 6.28750923e-01]
 [5.96252237e-01 4.03747763e-01]
 [6.95595509e-01 3.04404491e-01]
 [3.78289327e-01 6.21710673e-01]
 [8.00322451e-01 1.99677549e-01]
 [5.61706064e-01 4.38293936e-01]
 [6.63821939e-01 3.36178061e-01]
 [6.36478318e-01 3.63521682e-01]
 [5.44150020e-01 4.55849980e-01]
 [5.87776683e-01 4.12223317e-01]
 [5.41500209e-01 4.58499791e-01]
 [3.72071401e-01 6.27928599e-01]
 [7.27411913e-01 2.72588087e-01]
 [4.52586437e-01 5.47413563e-01]
 [4.19029372e-01 5.80970628e-01]
 [7.66183045e-01 2.33816955e-01]
 [3.00010208e-02 9.69998979e-01]
 [1.00529144e-02 9.89947086e-01]
 [3.85726652e-01 6.14273348e-01]
 [8.02155938e-01 1.97844062e-01]
 [0.00000000e+00 1.00000000e+00]
 [8.10715313e-01 1.89284687e-01]
 [7.31151416e-01 2.68848584e-01]
 [1.08355641e-01 8.91644359e-01]
 [7.07172165e-01 2.92827835e-01]
 [1.13386113e-01 8.86613887e-01]
 [5.05340375e-01 4.94659625e-01]
 [6.46549289e-01 3.53450711e-01]
 [5.79440659e-01 4.20559341e-01]
 [5.38417559e-01 4.61582441e-01]
 [1.13706673e-01 8.86293327e-01]
 [7.62492751e-01 2.37507249e-01]
 [0.00000000e+00 1.00000000e+00]
 [7.49892895e-01 2.50107105e-01]
 [1.61522778e-03 9.98384772e-01]
 [4.81790101e-01 5.18209899e-01]
 [6.88792930e-01 3.11207070e-01]
 [9.27876646e-02 9.07212335e-01]
 [3.80384613e-12 1.00000000e+00]
 [4.64983481e-01 5.35016519e-01]
 [5.81987442e-01 4.18012558e-01]
 [7.91542796e-01 2.08457204e-01]
 [5.44186218e-04 9.99455814e-01]
 [4.21631337e-01 5.78368663e-01]
 [8.13159180e-02 9.18684082e-01]
 [1.28434458e-02 9.87156554e-01]
 [3.92583841e-01 6.07416159e-01]
 [2.37539316e-01 7.62460684e-01]
 [6.33747814e-03 9.93662522e-01]
 [2.75090187e-05 9.99972491e-01]
 [6.80498945e-01 3.19501055e-01]
 [6.73000064e-01 3.26999936e-01]
 [5.57068584e-01 4.42931416e-01]
 [3.97459843e-14 1.00000000e+00]
 [2.85417277e-01 7.14582723e-01]
 [7.47774487e-01 2.52225513e-01]
 [4.54792163e-01 5.45207837e-01]
 [6.09541749e-02 9.39045825e-01]
 [5.73542225e-04 9.99426458e-01]
 [3.93413190e-02 9.60658681e-01]
 [8.83963239e-01 1.16036761e-01]
 [4.18741374e-01 5.81258626e-01]
 [5.39984692e-01 4.60015308e-01]
 [6.16309874e-01 3.83690126e-01]
 [6.83336455e-01 3.16663545e-01]
 [7.53538098e-01 2.46461902e-01]
 [1.43135108e-01 8.56864892e-01]
 [8.67927523e-01 1.32072477e-01]
 [4.11158412e-01 5.88841588e-01]
 [4.07417394e-01 5.92582606e-01]
 [2.90009940e-01 7.09990060e-01]
 [7.46453031e-01 2.53546969e-01]
 [3.94061844e-01 6.05938156e-01]
 [4.97989235e-01 5.02010765e-01]]
In [35]:
y_pred_class=[]
for value in y_pred_prob[:,1]:
    if value > 0.44:
        y_pred_class.append(1)
    else:
        y_pred_class.append(0)
#print(y_pred_class)
In [36]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,y_pred_class)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,y_pred_class))

acc=accuracy_score(Y_test, y_pred_class)
print("Accuracy of the model: ",acc)
#print(classification_report(Y_test, y_pred_class))
[[ 98  57]
 [ 26 215]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.79      0.63      0.70       155
         1.0       0.79      0.89      0.84       241

    accuracy                           0.79       396
   macro avg       0.79      0.76      0.77       396
weighted avg       0.79      0.79      0.79       396

Accuracy of the model:  0.7904040404040404
In [37]:
for a in np.arange(0.4,0.61,0.01):
    predict_mine = np.where(y_pred_prob[:,1] > a, 1, 0)
    cfm=confusion_matrix(Y_test, predict_mine)
    total_err=cfm[0,1]+cfm[1,0]
    print("Errors at threshold ", a, ":",total_err, " , type 2 error :", 
          cfm[1,0]," , type 1 error:", cfm[0,1])

# while finalizing a threshold focus upon reducing the total error compare to the base model 
# and then finalize that thershold which give you less type 2 error
Errors at threshold  0.4 : 89  , type 2 error : 16  , type 1 error: 73
Errors at threshold  0.41000000000000003 : 82  , type 2 error : 16  , type 1 error: 66
Errors at threshold  0.42000000000000004 : 80  , type 2 error : 18  , type 1 error: 62
Errors at threshold  0.43000000000000005 : 85  , type 2 error : 25  , type 1 error: 60
Errors at threshold  0.44000000000000006 : 83  , type 2 error : 26  , type 1 error: 57
Errors at threshold  0.45000000000000007 : 80  , type 2 error : 28  , type 1 error: 52
Errors at threshold  0.4600000000000001 : 76  , type 2 error : 30  , type 1 error: 46
Errors at threshold  0.4700000000000001 : 77  , type 2 error : 35  , type 1 error: 42
Errors at threshold  0.4800000000000001 : 73  , type 2 error : 36  , type 1 error: 37
Errors at threshold  0.4900000000000001 : 73  , type 2 error : 38  , type 1 error: 35
Errors at threshold  0.5000000000000001 : 74  , type 2 error : 40  , type 1 error: 34
Errors at threshold  0.5100000000000001 : 75  , type 2 error : 43  , type 1 error: 32
Errors at threshold  0.5200000000000001 : 75  , type 2 error : 45  , type 1 error: 30
Errors at threshold  0.5300000000000001 : 74  , type 2 error : 46  , type 1 error: 28
Errors at threshold  0.5400000000000001 : 72  , type 2 error : 48  , type 1 error: 24
Errors at threshold  0.5500000000000002 : 75  , type 2 error : 51  , type 1 error: 24
Errors at threshold  0.5600000000000002 : 74  , type 2 error : 53  , type 1 error: 21
Errors at threshold  0.5700000000000002 : 76  , type 2 error : 55  , type 1 error: 21
Errors at threshold  0.5800000000000002 : 77  , type 2 error : 58  , type 1 error: 19
Errors at threshold  0.5900000000000002 : 77  , type 2 error : 61  , type 1 error: 16
Errors at threshold  0.6000000000000002 : 77  , type 2 error : 65  , type 1 error: 12

SVM

In [44]:
from sklearn.svm import SVC

svc_model = SVC(kernel = 'rbf',gamma=0.1,C=5)
svc_model.fit(X_train,Y_train)
Y_pred = svc_model.predict(X_test)
print(list(Y_pred))
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0]
In [45]:
svc_model.score(X_train,Y_train)
Out[45]:
0.8223185265438786
In [46]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,y_pred_class)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,y_pred_class))

acc=accuracy_score(Y_test, y_pred_class)
print("Accuracy of the model: ",acc)
#print(classification_report(Y_test, y_pred_class))
[[ 98  57]
 [ 26 215]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.79      0.63      0.70       155
         1.0       0.79      0.89      0.84       241

    accuracy                           0.79       396
   macro avg       0.79      0.76      0.77       396
weighted avg       0.79      0.79      0.79       396

Accuracy of the model:  0.7904040404040404

KNN

In [47]:
# predicting using the KNeighbors classifier

from sklearn.neighbors import KNeighborsClassifier
model_KNN = KNeighborsClassifier(n_neighbors = int(np.sqrt(len(X_train))),metric='euclidean')

#euclidean,manhattan,minkowski
#fit the model on the data and predict the values
model_KNN.fit(X_train,Y_train)

Y_pred = model_KNN.predict(X_test)
print(list(zip(Y_test,Y_pred)))
[(0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
In [48]:
np.sqrt(len(X_train))
Out[48]:
30.380915061926625
In [49]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)
[[ 93  62]
 [ 64 177]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.59      0.60      0.60       155
         1.0       0.74      0.73      0.74       241

    accuracy                           0.68       396
   macro avg       0.67      0.67      0.67       396
weighted avg       0.68      0.68      0.68       396

Accuracy of the model:  0.6818181818181818
In [51]:
from sklearn.metrics import accuracy_score
my_dict={}
for K in range(1,100):
    model_KNN = KNeighborsClassifier(n_neighbors=K,metric="euclidean")
    model_KNN.fit(X_train, Y_train) 
    Y_pred = model_KNN.predict(X_test)
    print ("Accuracy is ", accuracy_score(Y_test,Y_pred), "for K-Value:",K)
    my_dict[K]=accuracy_score(Y_test,Y_pred)
Accuracy is  0.6515151515151515 for K-Value: 1
Accuracy is  0.6464646464646465 for K-Value: 2
Accuracy is  0.6994949494949495 for K-Value: 3
Accuracy is  0.6691919191919192 for K-Value: 4
Accuracy is  0.6994949494949495 for K-Value: 5
Accuracy is  0.6868686868686869 for K-Value: 6
Accuracy is  0.6792929292929293 for K-Value: 7
Accuracy is  0.696969696969697 for K-Value: 8
Accuracy is  0.696969696969697 for K-Value: 9
Accuracy is  0.6792929292929293 for K-Value: 10
Accuracy is  0.6818181818181818 for K-Value: 11
Accuracy is  0.6666666666666666 for K-Value: 12
Accuracy is  0.6944444444444444 for K-Value: 13
Accuracy is  0.6792929292929293 for K-Value: 14
Accuracy is  0.702020202020202 for K-Value: 15
Accuracy is  0.6893939393939394 for K-Value: 16
Accuracy is  0.6919191919191919 for K-Value: 17
Accuracy is  0.6868686868686869 for K-Value: 18
Accuracy is  0.6843434343434344 for K-Value: 19
Accuracy is  0.6843434343434344 for K-Value: 20
Accuracy is  0.6843434343434344 for K-Value: 21
Accuracy is  0.6919191919191919 for K-Value: 22
Accuracy is  0.6792929292929293 for K-Value: 23
Accuracy is  0.6742424242424242 for K-Value: 24
Accuracy is  0.6767676767676768 for K-Value: 25
Accuracy is  0.6742424242424242 for K-Value: 26
Accuracy is  0.6742424242424242 for K-Value: 27
Accuracy is  0.6767676767676768 for K-Value: 28
Accuracy is  0.696969696969697 for K-Value: 29
Accuracy is  0.6818181818181818 for K-Value: 30
Accuracy is  0.6919191919191919 for K-Value: 31
Accuracy is  0.6944444444444444 for K-Value: 32
Accuracy is  0.6818181818181818 for K-Value: 33
Accuracy is  0.6843434343434344 for K-Value: 34
Accuracy is  0.6742424242424242 for K-Value: 35
Accuracy is  0.6843434343434344 for K-Value: 36
Accuracy is  0.6893939393939394 for K-Value: 37
Accuracy is  0.6893939393939394 for K-Value: 38
Accuracy is  0.6919191919191919 for K-Value: 39
Accuracy is  0.6893939393939394 for K-Value: 40
Accuracy is  0.696969696969697 for K-Value: 41
Accuracy is  0.702020202020202 for K-Value: 42
Accuracy is  0.6994949494949495 for K-Value: 43
Accuracy is  0.702020202020202 for K-Value: 44
Accuracy is  0.6994949494949495 for K-Value: 45
Accuracy is  0.6919191919191919 for K-Value: 46
Accuracy is  0.7045454545454546 for K-Value: 47
Accuracy is  0.6893939393939394 for K-Value: 48
Accuracy is  0.6919191919191919 for K-Value: 49
Accuracy is  0.6944444444444444 for K-Value: 50
Accuracy is  0.696969696969697 for K-Value: 51
Accuracy is  0.6994949494949495 for K-Value: 52
Accuracy is  0.7070707070707071 for K-Value: 53
Accuracy is  0.6944444444444444 for K-Value: 54
Accuracy is  0.702020202020202 for K-Value: 55
Accuracy is  0.702020202020202 for K-Value: 56
Accuracy is  0.6944444444444444 for K-Value: 57
Accuracy is  0.6919191919191919 for K-Value: 58
Accuracy is  0.6818181818181818 for K-Value: 59
Accuracy is  0.6767676767676768 for K-Value: 60
Accuracy is  0.6919191919191919 for K-Value: 61
Accuracy is  0.6893939393939394 for K-Value: 62
Accuracy is  0.6893939393939394 for K-Value: 63
Accuracy is  0.6843434343434344 for K-Value: 64
Accuracy is  0.6843434343434344 for K-Value: 65
Accuracy is  0.6767676767676768 for K-Value: 66
Accuracy is  0.6843434343434344 for K-Value: 67
Accuracy is  0.6717171717171717 for K-Value: 68
Accuracy is  0.6893939393939394 for K-Value: 69
Accuracy is  0.6818181818181818 for K-Value: 70
Accuracy is  0.6792929292929293 for K-Value: 71
Accuracy is  0.6792929292929293 for K-Value: 72
Accuracy is  0.6792929292929293 for K-Value: 73
Accuracy is  0.6792929292929293 for K-Value: 74
Accuracy is  0.6843434343434344 for K-Value: 75
Accuracy is  0.6843434343434344 for K-Value: 76
Accuracy is  0.6792929292929293 for K-Value: 77
Accuracy is  0.6868686868686869 for K-Value: 78
Accuracy is  0.6792929292929293 for K-Value: 79
Accuracy is  0.6843434343434344 for K-Value: 80
Accuracy is  0.6742424242424242 for K-Value: 81
Accuracy is  0.6818181818181818 for K-Value: 82
Accuracy is  0.6767676767676768 for K-Value: 83
Accuracy is  0.6843434343434344 for K-Value: 84
Accuracy is  0.6717171717171717 for K-Value: 85
Accuracy is  0.6792929292929293 for K-Value: 86
Accuracy is  0.6818181818181818 for K-Value: 87
Accuracy is  0.6868686868686869 for K-Value: 88
Accuracy is  0.6666666666666666 for K-Value: 89
Accuracy is  0.6717171717171717 for K-Value: 90
Accuracy is  0.6590909090909091 for K-Value: 91
Accuracy is  0.6616161616161617 for K-Value: 92
Accuracy is  0.6540404040404041 for K-Value: 93
Accuracy is  0.6565656565656566 for K-Value: 94
Accuracy is  0.6565656565656566 for K-Value: 95
Accuracy is  0.6666666666666666 for K-Value: 96
Accuracy is  0.6666666666666666 for K-Value: 97
Accuracy is  0.6742424242424242 for K-Value: 98
Accuracy is  0.6691919191919192 for K-Value: 99
In [52]:
my_dict
Out[52]:
{1: 0.6515151515151515,
 2: 0.6464646464646465,
 3: 0.6994949494949495,
 4: 0.6691919191919192,
 5: 0.6994949494949495,
 6: 0.6868686868686869,
 7: 0.6792929292929293,
 8: 0.696969696969697,
 9: 0.696969696969697,
 10: 0.6792929292929293,
 11: 0.6818181818181818,
 12: 0.6666666666666666,
 13: 0.6944444444444444,
 14: 0.6792929292929293,
 15: 0.702020202020202,
 16: 0.6893939393939394,
 17: 0.6919191919191919,
 18: 0.6868686868686869,
 19: 0.6843434343434344,
 20: 0.6843434343434344,
 21: 0.6843434343434344,
 22: 0.6919191919191919,
 23: 0.6792929292929293,
 24: 0.6742424242424242,
 25: 0.6767676767676768,
 26: 0.6742424242424242,
 27: 0.6742424242424242,
 28: 0.6767676767676768,
 29: 0.696969696969697,
 30: 0.6818181818181818,
 31: 0.6919191919191919,
 32: 0.6944444444444444,
 33: 0.6818181818181818,
 34: 0.6843434343434344,
 35: 0.6742424242424242,
 36: 0.6843434343434344,
 37: 0.6893939393939394,
 38: 0.6893939393939394,
 39: 0.6919191919191919,
 40: 0.6893939393939394,
 41: 0.696969696969697,
 42: 0.702020202020202,
 43: 0.6994949494949495,
 44: 0.702020202020202,
 45: 0.6994949494949495,
 46: 0.6919191919191919,
 47: 0.7045454545454546,
 48: 0.6893939393939394,
 49: 0.6919191919191919,
 50: 0.6944444444444444,
 51: 0.696969696969697,
 52: 0.6994949494949495,
 53: 0.7070707070707071,
 54: 0.6944444444444444,
 55: 0.702020202020202,
 56: 0.702020202020202,
 57: 0.6944444444444444,
 58: 0.6919191919191919,
 59: 0.6818181818181818,
 60: 0.6767676767676768,
 61: 0.6919191919191919,
 62: 0.6893939393939394,
 63: 0.6893939393939394,
 64: 0.6843434343434344,
 65: 0.6843434343434344,
 66: 0.6767676767676768,
 67: 0.6843434343434344,
 68: 0.6717171717171717,
 69: 0.6893939393939394,
 70: 0.6818181818181818,
 71: 0.6792929292929293,
 72: 0.6792929292929293,
 73: 0.6792929292929293,
 74: 0.6792929292929293,
 75: 0.6843434343434344,
 76: 0.6843434343434344,
 77: 0.6792929292929293,
 78: 0.6868686868686869,
 79: 0.6792929292929293,
 80: 0.6843434343434344,
 81: 0.6742424242424242,
 82: 0.6818181818181818,
 83: 0.6767676767676768,
 84: 0.6843434343434344,
 85: 0.6717171717171717,
 86: 0.6792929292929293,
 87: 0.6818181818181818,
 88: 0.6868686868686869,
 89: 0.6666666666666666,
 90: 0.6717171717171717,
 91: 0.6590909090909091,
 92: 0.6616161616161617,
 93: 0.6540404040404041,
 94: 0.6565656565656566,
 95: 0.6565656565656566,
 96: 0.6666666666666666,
 97: 0.6666666666666666,
 98: 0.6742424242424242,
 99: 0.6691919191919192}
In [53]:
for k in my_dict:
    if my_dict[k]==max(my_dict.values()):
        print(k,':',my_dict[k])
53 : 0.7070707070707071
In [55]:
# predicting using the KNeighbors classifier

from sklearn.neighbors import KNeighborsClassifier
model_KNN = KNeighborsClassifier(n_neighbors = 53,metric='euclidean')

#euclidean,manhattan,minkowski
#fit the model on the data and predict the values
model_KNN.fit(X_train,Y_train)

Y_pred = model_KNN.predict(X_test)
print(list(zip(Y_test,Y_pred)))
[(0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (0.0, 0.0), (0.0, 1.0), (0.0, 1.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0), (0.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
In [56]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)
[[ 80  75]
 [ 41 200]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.66      0.52      0.58       155
         1.0       0.73      0.83      0.78       241

    accuracy                           0.71       396
   macro avg       0.69      0.67      0.68       396
weighted avg       0.70      0.71      0.70       396

Accuracy of the model:  0.7070707070707071

Decision Tree Classifier

In [67]:
# predicting using the decision tree classifier

from sklearn.tree import DecisionTreeClassifier
model_DecisionTree = DecisionTreeClassifier(criterion ='gini',random_state=10)

#fit the model on the data nad predict thee values

model_DecisionTree.fit(X_train,Y_train)
Y_pred = model_DecisionTree.predict(X_test)

#print(Y_pred)
#print(list(zip(Y_test)))
In [68]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)
[[151   4]
 [  1 240]]
Classification report: 
              precision    recall  f1-score   support

         0.0       0.99      0.97      0.98       155
         1.0       0.98      1.00      0.99       241

    accuracy                           0.99       396
   macro avg       0.99      0.99      0.99       396
weighted avg       0.99      0.99      0.99       396

Accuracy of the model:  0.9873737373737373

XG Boost Classifier

In [71]:
from xgboost import XGBClassifier

model_GradientBoosting = XGBClassifier (n_estimators=100,
                                 random_state=10)
#fit the model on the data and predict the values
model_GradientBoosting.fit(X_train,Y_train)

Y_pred =model_GradientBoosting.predict(X_test)
In [72]:
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report

#confusion matrix
print(confusion_matrix(Y_test,Y_pred))

print(classification_report(Y_test,Y_pred))
print(accuracy_score(Y_test,Y_pred))
[[153   2]
 [  1 240]]
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99       155
         1.0       0.99      1.00      0.99       241

    accuracy                           0.99       396
   macro avg       0.99      0.99      0.99       396
weighted avg       0.99      0.99      0.99       396

0.9924242424242424

Random Forest Classifier

In [73]:
from sklearn.ensemble import RandomForestClassifier

model_RandomForest= RandomForestClassifier(n_estimators=100,random_state=10)
# fit the model on the data and predict the values 
model_RandomForest.fit(X_train,Y_train)
Y_pred=model_RandomForest.predict(X_test)
In [74]:
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report

#confusion matrix
print(confusion_matrix(Y_test,Y_pred))

print(classification_report(Y_test,Y_pred))
print(accuracy_score(Y_test,Y_pred))
[[153   2]
 [  1 240]]
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99       155
         1.0       0.99      1.00      0.99       241

    accuracy                           0.99       396
   macro avg       0.99      0.99      0.99       396
weighted avg       0.99      0.99      0.99       396

0.9924242424242424
In [75]:
model_RandomForest.score(X_train,Y_train)
Out[75]:
1.0

The Conclusion from Model-building

Accuracy_score gained by all the algorithms while building model

Logistic Regression - 0.81313

SVM - 0.79040

KNN - 0.68181

Decision Tree - 0.0.98737

Random Forest - 0.99242

After using all these records,we are able to build machine learninng model(Random forest is the best model with accuracy score - 0.99242) to predict wheather patient is suffering from Heart-Diesease or not

In [77]:
model_RandomForest.feature_importances_
Out[77]:
array([0.05098718, 0.01000454, 0.02091991, 0.02979896, 0.02157757,
       0.02742172, 0.25253591, 0.58675421])

From the above output, It is not much clear that which feature is important for that reason .we will now make a visualization of the same.

Plotting feature importances

In [78]:
sample = pd.DataFrame()

sample ['Columns'] = data.columns[0:-1]
sample ['Results'] = model_RandomForest.feature_importances_

sample.sort_values('Results',ascending=False)
Out[78]:
Columns Results
7 Troponin 0.586754
6 CK-MB 0.252536
0 Age 0.050987
3 Systolic blood pressure 0.029799
5 Blood sugar 0.027422
4 Diastolic blood pressure 0.021578
2 Heart rate 0.020920
1 Gender 0.010005
In [80]:
print(sample)
                    Columns   Results
0                       Age  0.050987
1                    Gender  0.010005
2                Heart rate  0.020920
3   Systolic blood pressure  0.029799
4  Diastolic blood pressure  0.021578
5               Blood sugar  0.027422
6                     CK-MB  0.252536
7                  Troponin  0.586754
In [81]:
df = pd.DataFrame(sample)
plot = df.plot.bar(x='Columns',y='Results',)

Troponin is a type of protein found in the muscles of your heart. Troponin isn't normally found in the blood. When heart muscles become damaged, troponin is sent into the bloodstream. As heart damage increases, greater amounts of troponin are released in the blood.

Here fron the above visualization ,it is clearly visible that TROPONIN is the feature_importance in the dataset

Saving Model - RandomForest

In [82]:
import pickle
In [83]:
# firstly dump the model with pickle
pickle.dump(model_RandomForest,open("Heart_model1.pkl",'wb'))
In [84]:
# load the saved model
loaded_model = pickle.load(open("Heart_model1.pkl", "rb"))

Predicting by taking input from user

In [100]:
input_data1 = (21,1,94,98,46,296,6.75,1.06)
input_data_as_numpy_array1 = np.asarray(input_data1)
input_reshape1 = input_data_as_numpy_array1.reshape(1,-1)
In [101]:
prediction1 = loaded_model.predict(input_reshape1)
print(prediction1)
[1.]
In [102]:
if (prediction1[0]==0):
    print("The person does not has Heart ")
else:
    print("The person has Heart")
The person has Heart
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [14]:
!pip install gradio
Requirement already satisfied: gradio in c:\users\anike\anaconda3\lib\site-packages (3.16.0)
Requirement already satisfied: markdown-it-py[linkify,plugins] in c:\users\anike\anaconda3\lib\site-packages (from gradio) (2.1.0)
Requirement already satisfied: websockets>=10.0 in c:\users\anike\anaconda3\lib\site-packages (from gradio) (10.4)
Requirement already satisfied: numpy in c:\users\anike\anaconda3\lib\site-packages (from gradio) (1.21.6)
Requirement already satisfied: pycryptodome in c:\users\anike\anaconda3\lib\site-packages (from gradio) (3.16.0)
Requirement already satisfied: fsspec in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.6.2)
Requirement already satisfied: pydub in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.25.1)
Requirement already satisfied: requests in c:\users\anike\anaconda3\lib\site-packages (from gradio) (2.22.0)
Requirement already satisfied: pillow in c:\users\anike\anaconda3\lib\site-packages (from gradio) (7.0.0)
Requirement already satisfied: aiohttp in c:\users\anike\anaconda3\lib\site-packages (from gradio) (3.8.3)
Requirement already satisfied: pandas in c:\users\anike\anaconda3\lib\site-packages (from gradio) (1.3.5)
Requirement already satisfied: altair>=4.2.0 in c:\users\anike\anaconda3\lib\site-packages (from gradio) (4.2.0)
Requirement already satisfied: python-multipart in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.0.5)
Requirement already satisfied: typing-extensions in c:\users\anike\anaconda3\lib\site-packages (from gradio) (4.6.3)
Requirement already satisfied: httpx in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.23.3)
Requirement already satisfied: pyyaml in c:\users\anike\anaconda3\lib\site-packages (from gradio) (5.3)
Requirement already satisfied: markupsafe in c:\users\anike\anaconda3\lib\site-packages (from gradio) (2.1.1)
Requirement already satisfied: fastapi in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.88.0)
Requirement already satisfied: matplotlib in c:\users\anike\appdata\roaming\python\python37\site-packages (from gradio) (3.5.3)
Requirement already satisfied: orjson in c:\users\anike\anaconda3\lib\site-packages (from gradio) (3.8.4)
Requirement already satisfied: pydantic in c:\users\anike\anaconda3\lib\site-packages (from gradio) (1.8.2)
Requirement already satisfied: uvicorn in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.20.0)
Requirement already satisfied: ffmpy in c:\users\anike\anaconda3\lib\site-packages (from gradio) (0.3.0)
Requirement already satisfied: jinja2 in c:\users\anike\anaconda3\lib\site-packages (from gradio) (3.0.1)
Requirement already satisfied: mdurl~=0.1 in c:\users\anike\anaconda3\lib\site-packages (from markdown-it-py[linkify,plugins]->gradio) (0.1.2)
Requirement already satisfied: linkify-it-py~=1.0; extra == "linkify" in c:\users\anike\anaconda3\lib\site-packages (from markdown-it-py[linkify,plugins]->gradio) (1.0.3)
Requirement already satisfied: mdit-py-plugins; extra == "plugins" in c:\users\anike\anaconda3\lib\site-packages (from markdown-it-py[linkify,plugins]->gradio) (0.3.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\anike\anaconda3\lib\site-packages (from requests->gradio) (2020.11.8)
Requirement already satisfied: idna<2.9,>=2.5 in c:\users\anike\anaconda3\lib\site-packages (from requests->gradio) (2.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\users\anike\anaconda3\lib\site-packages (from requests->gradio) (1.24.3)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\users\anike\anaconda3\lib\site-packages (from requests->gradio) (3.0.4)
Requirement already satisfied: attrs>=17.3.0 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (19.3.0)
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (4.0.2)
Requirement already satisfied: frozenlist>=1.1.1 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (1.3.3)
Requirement already satisfied: aiosignal>=1.1.2 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (1.3.1)
Requirement already satisfied: multidict<7.0,>=4.5 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (6.0.4)
Requirement already satisfied: asynctest==0.13.0; python_version < "3.8" in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (0.13.0)
Requirement already satisfied: yarl<2.0,>=1.0 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (1.8.2)
Requirement already satisfied: charset-normalizer<3.0,>=2.0 in c:\users\anike\anaconda3\lib\site-packages (from aiohttp->gradio) (2.0.4)
Requirement already satisfied: pytz>=2017.3 in c:\users\anike\anaconda3\lib\site-packages (from pandas->gradio) (2019.3)
Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\anike\anaconda3\lib\site-packages (from pandas->gradio) (2.8.1)
Requirement already satisfied: entrypoints in c:\users\anike\anaconda3\lib\site-packages (from altair>=4.2.0->gradio) (0.3)
Requirement already satisfied: toolz in c:\users\anike\anaconda3\lib\site-packages (from altair>=4.2.0->gradio) (0.10.0)
Requirement already satisfied: jsonschema>=3.0 in c:\users\anike\anaconda3\lib\site-packages (from altair>=4.2.0->gradio) (3.2.0)
Requirement already satisfied: six>=1.4.0 in c:\users\anike\anaconda3\lib\site-packages (from python-multipart->gradio) (1.14.0)
Requirement already satisfied: rfc3986[idna2008]<2,>=1.3 in c:\users\anike\anaconda3\lib\site-packages (from httpx->gradio) (1.5.0)
Requirement already satisfied: sniffio in c:\users\anike\anaconda3\lib\site-packages (from httpx->gradio) (1.3.0)
Requirement already satisfied: httpcore<0.17.0,>=0.15.0 in c:\users\anike\anaconda3\lib\site-packages (from httpx->gradio) (0.16.3)
Requirement already satisfied: starlette==0.22.0 in c:\users\anike\anaconda3\lib\site-packages (from fastapi->gradio) (0.22.0)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\anike\anaconda3\lib\site-packages (from matplotlib->gradio) (2.4.7)
Requirement already satisfied: packaging>=20.0 in c:\users\anike\anaconda3\lib\site-packages (from matplotlib->gradio) (21.3)
Requirement already satisfied: cycler>=0.10 in c:\users\anike\anaconda3\lib\site-packages (from matplotlib->gradio) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\anike\appdata\roaming\python\python37\site-packages (from matplotlib->gradio) (4.38.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\anike\anaconda3\lib\site-packages (from matplotlib->gradio) (1.1.0)
Requirement already satisfied: click>=7.0 in c:\users\anike\anaconda3\lib\site-packages (from uvicorn->gradio) (8.1.3)
Requirement already satisfied: h11>=0.8 in c:\users\anike\anaconda3\lib\site-packages (from uvicorn->gradio) (0.14.0)
Requirement already satisfied: uc-micro-py in c:\users\anike\anaconda3\lib\site-packages (from linkify-it-py~=1.0; extra == "linkify"->markdown-it-py[linkify,plugins]->gradio) (1.0.1)
Requirement already satisfied: setuptools in c:\users\anike\anaconda3\lib\site-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (45.2.0.post20200210)
Requirement already satisfied: importlib-metadata; python_version < "3.8" in c:\users\anike\anaconda3\lib\site-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (4.11.4)
Requirement already satisfied: pyrsistent>=0.14.0 in c:\users\anike\anaconda3\lib\site-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (0.15.7)
Requirement already satisfied: anyio<5.0,>=3.0 in c:\users\anike\anaconda3\lib\site-packages (from httpcore<0.17.0,>=0.15.0->httpx->gradio) (3.6.2)
Requirement already satisfied: colorama; platform_system == "Windows" in c:\users\anike\anaconda3\lib\site-packages (from click>=7.0->uvicorn->gradio) (0.4.4)
Requirement already satisfied: zipp>=0.5 in c:\users\anike\anaconda3\lib\site-packages (from importlib-metadata; python_version < "3.8"->jsonschema>=3.0->altair>=4.2.0->gradio) (3.1.0)
In [15]:
import gradio as gr
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-15-43eca54f7d45> in <module>
----> 1 import gradio as gr

~\anaconda3\lib\site-packages\gradio\__init__.py in <module>
      1 import pkgutil
      2 
----> 3 import gradio.components as components
      4 import gradio.inputs as inputs
      5 import gradio.outputs as outputs

~\anaconda3\lib\site-packages\gradio\components.py in <module>
     32 from typing_extensions import Literal
     33 
---> 34 from gradio import media_data, processing_utils, utils
     35 from gradio.blocks import Block, BlockContext
     36 from gradio.context import Context

~\anaconda3\lib\site-packages\gradio\processing_utils.py in <module>
     21 from PIL import Image, ImageOps, PngImagePlugin
     22 
---> 23 from gradio import encryptor, utils
     24 
     25 with warnings.catch_warnings():

~\anaconda3\lib\site-packages\gradio\utils.py in <module>
     37 
     38 import aiohttp
---> 39 import fsspec.asyn
     40 import httpx
     41 import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'fsspec.asyn'
In [13]:
import fsspec
pip install fsspec
  File "<ipython-input-13-b9da1d31c0b6>", line 2
    pip install fsspec
              ^
SyntaxError: invalid syntax
In [1]:
def inference(age, sex, ch, cardio):
    s = 0 if sex=='female' else 1
    df = pd.DataFrame([[age, s, ch, cardio]], 
                      columns=['Age', 'Sex', 'Cholestoral (in mg/dl)', 
                               'Resting electrocardiographic results'])
    df = st.transform(df)
    pred = trainedmodel.predict_proba(df)[0]
    res = {'No Heart Desease': pred[0], 'Has Heart Desease': pred[1]}
    return res

sex = gr.Radio(['female', 'male'], label="Sex")
age = gr.Slider(minimum=1, maximum=100, value=22, label="Age")
ch = gr.Slider(minimum=120, maximum=560, value=200, label="Cholestoral (in mg/dl)")
cardio = gr.Radio([0, 1, 2], label="Resting electrocardiographic results")

gr.Interface(inference, [age, sex, ch, cardio], "label", live=True).launch(share=True) #, debug=True Use in Colab
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-4afed7f41535> in <module>
      9     return res
     10 
---> 11 sex = gr.Radio(['female', 'male'], label="Sex")
     12 age = gr.Slider(minimum=1, maximum=100, value=22, label="Age")
     13 ch = gr.Slider(minimum=120, maximum=560, value=200, label="Cholestoral (in mg/dl)")

NameError: name 'gr' is not defined
In [ ]: